notebook.community



In [7]:

    
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.cross_validation import train_test_split
from textblob import TextBlob,Word
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd









    



C:\Anaconda3\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)



In [8]:

    
train = pd.read_csv('files/restaurant_review1.csv', encoding = "ISO-8859-1")



In [9]:

    
train=train.dropna(axis=0)



In [10]:

    
def stemming_tokenizer(text):
    stopwords = []
    with open('files/new_sw.txt','r+') as f:
        stopword = f.readlines()
        for sw in stopword:
            sw = sw[:-1]
            stopwords.append(sw)
    f.close()
    text = text.lower()
    words = TextBlob(text).correct().words
    words = [Word(w).lemmatize("v") for w in words if not w in stopwords]
    return words

Classifier for food



In [31]:

    
food_classifier = Pipeline([
        ('vectorizer', CountVectorizer(tokenizer=stemming_tokenizer,ngram_range=(1,3))),
        ('tfidf', TfidfTransformer()),
        ('classifier', KNeighborsClassifier(n_neighbors=5)),
        ])



In [36]:

    
X_train, X_test, y_train, y_test = train_test_split(train['review'],train['Food'], test_size=0.25, random_state=33)



In [37]:

    
classifier=food_classifier.fit(X_train, y_train)
print("Accuracy: %s" % classifier.score(X_test, y_test))









    



Accuracy: 0.697674418605

Classifier for service



In [38]:

    
service_classifier = Pipeline([
        ('vectorizer', CountVectorizer(tokenizer=stemming_tokenizer,ngram_range=(1,3))),
        ('tfidf', TfidfTransformer()),
        ('classifier', KNeighborsClassifier(n_neighbors=5)),
        ])

X_train, X_test, y_train, y_test = train_test_split(train['review'],train['Service'], test_size=0.25, random_state=33)

classifier=service_classifier.fit(X_train, y_train)
print("Accuracy: %s" % classifier.score(X_test, y_test))









    



Accuracy: 0.697674418605

Classifier for ambience



In [39]:

    
amb_classifier = Pipeline([
        ('vectorizer', CountVectorizer(tokenizer=stemming_tokenizer,ngram_range=(1,3))),
        ('tfidf', TfidfTransformer()),
        ('classifier', KNeighborsClassifier(n_neighbors=5)),
        ])

X_train, X_test, y_train, y_test = train_test_split(train['review'],train['Ambience'], test_size=0.25, random_state=33)

classifier=amb_classifier.fit(X_train, y_train)
print("Accuracy: %s" % classifier.score(X_test, y_test))









    



Accuracy: 0.720930232558

Classifier for deals



In [40]:

    
deal_classifier = Pipeline([
        ('vectorizer', CountVectorizer(tokenizer=stemming_tokenizer,ngram_range=(1,3))),
        ('tfidf', TfidfTransformer()),
        ('classifier', KNeighborsClassifier(n_neighbors=5)),
        ])

X_train, X_test, y_train, y_test = train_test_split(train['review'],train['deal'], test_size=0.25, random_state=33)

classifier=deal_classifier.fit(X_train, y_train)

print("Accuracy: %s" % classifier.score(X_test, y_test))









    



Accuracy: 0.860465116279

get reviews from a restaurant with id = res_id



In [44]:

    
df = pd.read_csv('files/reviews.csv', encoding = "ISO-8859-1")



In [119]:

    
df.head()









    Out[119]:







  
    
      
      funny
      rating
      user_id
      review
      restaurant_id
      review_id
      date
      cool
      useful
    
  
  
    
      0
      0
      5
      24538
      My wife took me here on my birthday for breakf...
      3010
      1
      2011-01-26
      2
      5
    
    
      1
      0
      5
      40413
      I have no idea why some people give bad review...
      1191
      2
      2011-07-27
      0
      0
    
    
      2
      0
      4
      36383
      love the gyro plate. Rice is so good and I als...
      1989
      3
      2012-06-14
      0
      1
    
    
      3
      1
      4
      25790
      Quiessence is, simply put, beautiful.  Full wi...
      3566
      4
      2007-12-13
      4
      3
    
    
      4
      4
      5
      16256
      Drop what you're doing and drive here. After I...
      1019
      5
      2010-02-12
      7
      7



In [120]:

    
def getReview(res_id):
    reviews = []
    
    rev = df[(df['restaurant_id']==res_id) & (df['useful']>0)]['review']
    for r in rev:
        reviews.append(r)
    return reviews



In [46]:

    
rev = getReview(3)



In [53]:

    
pdf= pd.read_csv('files/restaurants.csv',encoding = "ISO-8859-1")



In [56]:

    
pred= pdf[['new_id']]



In [63]:

    
pred.columns=['restaurant_id']



In [64]:

    
pred['food']=-1









    



C:\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':



In [65]:

    
pred['service']=-1
pred['ambience']=-1
pred['deals']=-1









    



C:\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
C:\Anaconda3\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
C:\Anaconda3\lib\site-packages\ipykernel\__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()



In [3]:

    
import pickle



In [136]:

    
def save_obj(obj, name ):
    with open( name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)



In [144]:

    
save_obj(food_classifier, 'food_classifier')
save_obj(service_classifier, 'service_classifier')
save_obj(amb_classifier, 'amb_classifier')
save_obj(deal_classifier, 'deal_classifier')



In [5]:

    
def load_obj(name ):
    with open( name + '.pkl', 'rb') as f:
        return pickle.load(f)



In [11]:

    
f= load_obj('food_classifier')
s= load_obj('service_classifier')
a= load_obj('amb_classifier')
d= load_obj('deal_classifier')



In [25]:

    
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score



In [27]:

    
X_train, X_test, y_train, y_test = train_test_split(train['review'],train['Food'], test_size=0.25, random_state=33)
print(classification_report(y_test,f.predict(X_test)))









    



             precision    recall  f1-score   support

        0.0       0.75      0.35      0.48        17
        1.0       0.69      0.92      0.79        26

avg / total       0.71      0.70      0.67        43



In [29]:

    
X_train, X_test, y_train, y_test = train_test_split(train['review'],train['Service'], test_size=0.25, random_state=33)
print(classification_report(y_test,s.predict(X_test)))









    



             precision    recall  f1-score   support

        0.0       0.76      0.83      0.79        30
        1.0       0.50      0.38      0.43        13

avg / total       0.68      0.70      0.69        43



In [30]:

    
X_train, X_test, y_train, y_test = train_test_split(train['review'],train['Ambience'], test_size=0.25, random_state=33)
print(classification_report(y_test,a.predict(X_test)))









    



             precision    recall  f1-score   support

        0.0       0.70      1.00      0.82        28
        1.0       1.00      0.20      0.33        15

avg / total       0.80      0.72      0.65        43



In [31]:

    
X_train, X_test, y_train, y_test = train_test_split(train['review'],train['deal'], test_size=0.25, random_state=33)
print(classification_report(y_test,d.predict(X_test)))









    



             precision    recall  f1-score   support

        0.0       0.85      1.00      0.92        35
        1.0       1.00      0.25      0.40         8

avg / total       0.88      0.86      0.82        43



In [132]:

    
def getPrediction(id):
    rev= getReview(id)
    food_count = 0 
    serv_count = 0
    amb_count = 0
    deal_count = 0

    for review in rev:
        prediction = {}
        prediction["food"] = food_classifier.predict([review])[0]
        prediction["service"] = service_classifier.predict([review])[0]
        prediction["amb"] = amb_classifier.predict([review])[0]
        prediction["deals"] = deal_classifier.predict([review])[0]

        if(prediction["food"]==1):
            food_count += 1
        if(prediction["service"]==1):
            serv_count += 1
        if(prediction["amb"]==1):
            amb_count += 1
        if(prediction["deals"]==1):
            deal_count += 1

    #     print(prediction)

    rev_size = len(rev)
    if(rev_size> 0 ):
        food_per = food_count/rev_size*100
        serv_per = serv_count/rev_size*100
        amb_per = amb_count/rev_size*100
        deal_per = deal_count/rev_size*100
    else:
        food_per= 0 
        serv_per=0
        amb_per=0
        deal_per=0 

    # print(rev_size,food_per,serv_per,amb_per,deal_per)
    return (rev_size,food_per,serv_per,amb_per,deal_per)



In [127]:

    
for id in pred[pred['food']==-1].restaurant_id:
    i,a,b,c,d= getPrediction(id)
    pred.iloc[id-1,1]= a
    pred.iloc[id-1,2]= b 
    pred.iloc[id-1,3]= c 
    pred.iloc[id-1,4]= d 
    print(i)









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-127-afe62aa5d5d9> in <module>()
      1 for id in pred[pred['food']==-1].restaurant_id:
----> 2     i,a,b,c,d= getPrediction(id)
      3     pred.iloc[id-1,1]= a
      4     pred.iloc[id-1,2]= b
      5     pred.iloc[id-1,3]= c

<ipython-input-121-bc461ddd612a> in getPrediction(id)
     10         prediction["food"] = food_classifier.predict([review])[0]
     11         prediction["service"] = service_classifier.predict([review])[0]
---> 12         prediction["amb"] = amb_classifier.predict([review])[0]
     13         prediction["deals"] = deal_classifier.predict([review])[0]
     14 

C:\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args, **kwargs)
     52 
     53         # lambda, but not partial, allows help() to work with update_wrapper
---> 54         out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
     55         # update the docstring of the returned function
     56         update_wrapper(out, self.fn)

C:\Anaconda3\lib\site-packages\sklearn\pipeline.py in predict(self, X)
    324         for name, transform in self.steps[:-1]:
    325             if transform is not None:
--> 326                 Xt = transform.transform(Xt)
    327         return self.steps[-1][-1].predict(Xt)
    328 

C:\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in transform(self, raw_documents)
    891 
    892         # use the same matrix-building strategy as fit_transform
--> 893         _, X = self._count_vocab(raw_documents, fixed_vocab=True)
    894         if self.binary:
    895             X.data.fill(1)

C:\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
    760         for doc in raw_documents:
    761             feature_counter = {}
--> 762             for feature in analyze(doc):
    763                 try:
    764                     feature_idx = vocabulary[feature]

C:\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(doc)
    239 
    240             return lambda doc: self._word_ngrams(
--> 241                 tokenize(preprocess(self.decode(doc))), stop_words)
    242 
    243         else:

<ipython-input-30-3182fd224a51> in stemming_tokenizer(text)
      8     f.close()
      9     text = text.lower()
---> 10     words = TextBlob(text).correct().words
     11     words = [Word(w).lemmatize("v") for w in words if not w in stopwords]
     12     return words

C:\Anaconda3\lib\site-packages\textblob\blob.py in correct(self)
    553         tokens = nltk.tokenize.regexp_tokenize(self.raw, "\w+|[^\w\s]|\s")
    554         corrected = (Word(w).correct() for w in tokens)
--> 555         ret = ''.join(corrected)
    556         return self.__class__(ret)
    557 

C:\Anaconda3\lib\site-packages\textblob\blob.py in <genexpr>(.0)
    552         # regex matches: word or punctuation or whitespace
    553         tokens = nltk.tokenize.regexp_tokenize(self.raw, "\w+|[^\w\s]|\s")
--> 554         corrected = (Word(w).correct() for w in tokens)
    555         ret = ''.join(corrected)
    556         return self.__class__(ret)

C:\Anaconda3\lib\site-packages\textblob\blob.py in correct(self)
    125         .. versionadded:: 0.6.0
    126         '''
--> 127         return Word(self.spellcheck()[0][0])
    128 
    129     @cached_property

C:\Anaconda3\lib\site-packages\textblob\blob.py in spellcheck(self)
    117         .. versionadded:: 0.6.0
    118         '''
--> 119         return suggest(self.string)
    120 
    121     def correct(self):

C:\Anaconda3\lib\site-packages\textblob\en\__init__.py in suggest(w)
    121     """ Returns a list of (word, confidence)-tuples of spelling corrections.
    122     """
--> 123     return spelling.suggest(w)
    124 
    125 def polarity(s, **kwargs):

C:\Anaconda3\lib\site-packages\textblob\_text.py in suggest(self, w)
   1396         candidates = self._known([w]) \
   1397                   or self._known(self._edit1(w)) \
-> 1398                   or self._known(self._edit2(w)) \
   1399                   or [w]
   1400         candidates = [(self.get(c, 0.0), c) for c in candidates]

C:\Anaconda3\lib\site-packages\textblob\_text.py in _edit2(self, w)
   1373         # Of all spelling errors, 99% is covered by edit distance 2.
   1374         # Only keep candidates that are actually known words (20% speedup).
-> 1375         return set(e2 for e1 in self._edit1(w) for e2 in self._edit1(e1) if e2 in self)
   1376 
   1377     def _known(self, words=[]):

C:\Anaconda3\lib\site-packages\textblob\_text.py in <genexpr>(.0)
   1373         # Of all spelling errors, 99% is covered by edit distance 2.
   1374         # Only keep candidates that are actually known words (20% speedup).
-> 1375         return set(e2 for e1 in self._edit1(w) for e2 in self._edit1(e1) if e2 in self)
   1376 
   1377     def _known(self, words=[]):

C:\Anaconda3\lib\site-packages\textblob\_text.py in _edit1(self, w)
   1363             [a + b[1:] for a, b in split if b],
   1364             [a + b[1] + b[0] + b[2:] for a, b in split if len(b) > 1],
-> 1365             [a + c + b[1:] for a, b in split for c in Spelling.ALPHA if b],
   1366             [a + c + b[0:] for a, b in split for c in Spelling.ALPHA]
   1367         )

C:\Anaconda3\lib\site-packages\textblob\_text.py in <listcomp>(.0)
   1363             [a + b[1:] for a, b in split if b],
   1364             [a + b[1] + b[0] + b[2:] for a, b in split if len(b) > 1],
-> 1365             [a + c + b[1:] for a, b in split for c in Spelling.ALPHA if b],
   1366             [a + c + b[0:] for a, b in split for c in Spelling.ALPHA]
   1367         )

KeyboardInterrupt:



In [125]:

    
rev = getReview(96)



In [126]:

    
len(rev)









    Out[126]:





244



In [131]:

    
pred.head()









    Out[131]:







  
    
      
      restaurant_id
      food
      service
      ambience
      deals
    
  
  
    
      0
      1
      100.000000
      23.076923
      0.000000
      0.0
    
    
      1
      2
      62.500000
      25.000000
      12.500000
      0.0
    
    
      2
      3
      100.000000
      25.000000
      0.000000
      0.0
    
    
      3
      4
      100.000000
      4.000000
      6.000000
      0.0
    
    
      4
      5
      90.909091
      31.818182
      4.545455
      0.0



In [130]:

    
pred[(pred['food']==-1)]









    Out[130]:







  
    
      
      restaurant_id
      food
      service
      ambience
      deals
    
  
  
    
      95
      96
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      96
      97
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      97
      98
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      98
      99
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      99
      100
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      100
      101
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      101
      102
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      102
      103
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      103
      104
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      104
      105
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      105
      106
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      106
      107
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      107
      108
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      108
      109
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      109
      110
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      110
      111
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      111
      112
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      112
      113
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      113
      114
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      114
      115
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      115
      116
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      116
      117
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      117
      118
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      118
      119
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      119
      120
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      120
      121
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      121
      122
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      122
      123
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      123
      124
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      124
      125
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      4473
      4474
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4474
      4475
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4475
      4476
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4476
      4477
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4477
      4478
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4478
      4479
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4479
      4480
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4480
      4481
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4481
      4482
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4482
      4483
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4483
      4484
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4484
      4485
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4485
      4486
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4486
      4487
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4487
      4488
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4488
      4489
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4489
      4490
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4490
      4491
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4491
      4492
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4492
      4493
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4493
      4494
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4494
      4495
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4495
      4496
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4496
      4497
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4497
      4498
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4498
      4499
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4499
      4500
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4500
      4501
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4501
      4502
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4502
      4503
      -1.0
      -1.0
      -1.0
      -1.0
    
  

4408 rows × 5 columns



In [129]:

    
pred.to_csv("prediction.csv", index=False)



In [149]:

    
pred[(pred['food']==-1)& (pred['restaurant_id']>=1000)]









    Out[149]:







  
    
      
      restaurant_id
      food
      service
      ambience
      deals
    
  
  
    
      999
      1000
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1000
      1001
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1001
      1002
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1002
      1003
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1003
      1004
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1004
      1005
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1005
      1006
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1006
      1007
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1007
      1008
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1008
      1009
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1009
      1010
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1010
      1011
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1011
      1012
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1012
      1013
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1013
      1014
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1014
      1015
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1015
      1016
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1016
      1017
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1017
      1018
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1018
      1019
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1019
      1020
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1020
      1021
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1021
      1022
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1022
      1023
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1023
      1024
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1024
      1025
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1025
      1026
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1026
      1027
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1027
      1028
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      1028
      1029
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      4473
      4474
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4474
      4475
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4475
      4476
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4476
      4477
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4477
      4478
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4478
      4479
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4479
      4480
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4480
      4481
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4481
      4482
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4482
      4483
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4483
      4484
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4484
      4485
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4485
      4486
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4486
      4487
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4487
      4488
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4488
      4489
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4489
      4490
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4490
      4491
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4491
      4492
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4492
      4493
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4493
      4494
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4494
      4495
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4495
      4496
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4496
      4497
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4497
      4498
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4498
      4499
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4499
      4500
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4500
      4501
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4501
      4502
      -1.0
      -1.0
      -1.0
      -1.0
    
    
      4502
      4503
      -1.0
      -1.0
      -1.0
      -1.0
    
  

3504 rows × 5 columns



In [ ]:



In [ ]:

	funny	rating	user_id	review	restaurant_id	review_id	date	cool	useful
0	0	5	24538	My wife took me here on my birthday for breakf...	3010	1	2011-01-26	2	5
1	0	5	40413	I have no idea why some people give bad review...	1191	2	2011-07-27	0	0
2	0	4	36383	love the gyro plate. Rice is so good and I als...	1989	3	2012-06-14	0	1
3	1	4	25790	Quiessence is, simply put, beautiful. Full wi...	3566	4	2007-12-13	4	3
4	4	5	16256	Drop what you're doing and drive here. After I...	1019	5	2010-02-12	7	7

	restaurant_id	food	service	ambience
0	1	100.000000	23.076923	0.000000
1	2	62.500000	25.000000	12.500000
2	3	100.000000	25.000000	0.000000
3	4	100.000000	4.000000	6.000000
4	5	90.909091	31.818182	4.545455

	restaurant_id	food	service	ambience	deals
95	96	-1.0	-1.0	-1.0	-1.0
96	97	-1.0	-1.0	-1.0	-1.0
97	98	-1.0	-1.0	-1.0	-1.0
98	99	-1.0	-1.0	-1.0	-1.0
99	100	-1.0	-1.0	-1.0	-1.0
100	101	-1.0	-1.0	-1.0	-1.0
101	102	-1.0	-1.0	-1.0	-1.0
102	103	-1.0	-1.0	-1.0	-1.0
103	104	-1.0	-1.0	-1.0	-1.0
104	105	-1.0	-1.0	-1.0	-1.0
105	106	-1.0	-1.0	-1.0	-1.0
106	107	-1.0	-1.0	-1.0	-1.0
107	108	-1.0	-1.0	-1.0	-1.0
108	109	-1.0	-1.0	-1.0	-1.0
109	110	-1.0	-1.0	-1.0	-1.0
110	111	-1.0	-1.0	-1.0	-1.0
111	112	-1.0	-1.0	-1.0	-1.0
112	113	-1.0	-1.0	-1.0	-1.0
113	114	-1.0	-1.0	-1.0	-1.0
114	115	-1.0	-1.0	-1.0	-1.0
115	116	-1.0	-1.0	-1.0	-1.0
116	117	-1.0	-1.0	-1.0	-1.0
117	118	-1.0	-1.0	-1.0	-1.0
118	119	-1.0	-1.0	-1.0	-1.0
119	120	-1.0	-1.0	-1.0	-1.0
120	121	-1.0	-1.0	-1.0	-1.0
121	122	-1.0	-1.0	-1.0	-1.0
122	123	-1.0	-1.0	-1.0	-1.0
123	124	-1.0	-1.0	-1.0	-1.0
124	125	-1.0	-1.0	-1.0	-1.0
...	...	...	...	...	...
4473	4474	-1.0	-1.0	-1.0	-1.0
4474	4475	-1.0	-1.0	-1.0	-1.0
4475	4476	-1.0	-1.0	-1.0	-1.0
4476	4477	-1.0	-1.0	-1.0	-1.0
4477	4478	-1.0	-1.0	-1.0	-1.0
4478	4479	-1.0	-1.0	-1.0	-1.0
4479	4480	-1.0	-1.0	-1.0	-1.0
4480	4481	-1.0	-1.0	-1.0	-1.0
4481	4482	-1.0	-1.0	-1.0	-1.0
4482	4483	-1.0	-1.0	-1.0	-1.0
4483	4484	-1.0	-1.0	-1.0	-1.0
4484	4485	-1.0	-1.0	-1.0	-1.0
4485	4486	-1.0	-1.0	-1.0	-1.0
4486	4487	-1.0	-1.0	-1.0	-1.0
4487	4488	-1.0	-1.0	-1.0	-1.0
4488	4489	-1.0	-1.0	-1.0	-1.0
4489	4490	-1.0	-1.0	-1.0	-1.0
4490	4491	-1.0	-1.0	-1.0	-1.0
4491	4492	-1.0	-1.0	-1.0	-1.0
4492	4493	-1.0	-1.0	-1.0	-1.0
4493	4494	-1.0	-1.0	-1.0	-1.0
4494	4495	-1.0	-1.0	-1.0	-1.0
4495	4496	-1.0	-1.0	-1.0	-1.0
4496	4497	-1.0	-1.0	-1.0	-1.0
4497	4498	-1.0	-1.0	-1.0	-1.0
4498	4499	-1.0	-1.0	-1.0	-1.0
4499	4500	-1.0	-1.0	-1.0	-1.0
4500	4501	-1.0	-1.0	-1.0	-1.0
4501	4502	-1.0	-1.0	-1.0	-1.0
4502	4503	-1.0	-1.0	-1.0	-1.0

	restaurant_id	food	service	ambience	deals
999	1000	-1.0	-1.0	-1.0	-1.0
1000	1001	-1.0	-1.0	-1.0	-1.0
1001	1002	-1.0	-1.0	-1.0	-1.0
1002	1003	-1.0	-1.0	-1.0	-1.0
1003	1004	-1.0	-1.0	-1.0	-1.0
1004	1005	-1.0	-1.0	-1.0	-1.0
1005	1006	-1.0	-1.0	-1.0	-1.0
1006	1007	-1.0	-1.0	-1.0	-1.0
1007	1008	-1.0	-1.0	-1.0	-1.0
1008	1009	-1.0	-1.0	-1.0	-1.0
1009	1010	-1.0	-1.0	-1.0	-1.0
1010	1011	-1.0	-1.0	-1.0	-1.0
1011	1012	-1.0	-1.0	-1.0	-1.0
1012	1013	-1.0	-1.0	-1.0	-1.0
1013	1014	-1.0	-1.0	-1.0	-1.0
1014	1015	-1.0	-1.0	-1.0	-1.0
1015	1016	-1.0	-1.0	-1.0	-1.0
1016	1017	-1.0	-1.0	-1.0	-1.0
1017	1018	-1.0	-1.0	-1.0	-1.0
1018	1019	-1.0	-1.0	-1.0	-1.0
1019	1020	-1.0	-1.0	-1.0	-1.0
1020	1021	-1.0	-1.0	-1.0	-1.0
1021	1022	-1.0	-1.0	-1.0	-1.0
1022	1023	-1.0	-1.0	-1.0	-1.0
1023	1024	-1.0	-1.0	-1.0	-1.0
1024	1025	-1.0	-1.0	-1.0	-1.0
1025	1026	-1.0	-1.0	-1.0	-1.0
1026	1027	-1.0	-1.0	-1.0	-1.0
1027	1028	-1.0	-1.0	-1.0	-1.0
1028	1029	-1.0	-1.0	-1.0	-1.0
...	...	...	...	...	...
4473	4474	-1.0	-1.0	-1.0	-1.0
4474	4475	-1.0	-1.0	-1.0	-1.0
4475	4476	-1.0	-1.0	-1.0	-1.0
4476	4477	-1.0	-1.0	-1.0	-1.0
4477	4478	-1.0	-1.0	-1.0	-1.0
4478	4479	-1.0	-1.0	-1.0	-1.0
4479	4480	-1.0	-1.0	-1.0	-1.0
4480	4481	-1.0	-1.0	-1.0	-1.0
4481	4482	-1.0	-1.0	-1.0	-1.0
4482	4483	-1.0	-1.0	-1.0	-1.0
4483	4484	-1.0	-1.0	-1.0	-1.0
4484	4485	-1.0	-1.0	-1.0	-1.0
4485	4486	-1.0	-1.0	-1.0	-1.0
4486	4487	-1.0	-1.0	-1.0	-1.0
4487	4488	-1.0	-1.0	-1.0	-1.0
4488	4489	-1.0	-1.0	-1.0	-1.0
4489	4490	-1.0	-1.0	-1.0	-1.0
4490	4491	-1.0	-1.0	-1.0	-1.0
4491	4492	-1.0	-1.0	-1.0	-1.0
4492	4493	-1.0	-1.0	-1.0	-1.0
4493	4494	-1.0	-1.0	-1.0	-1.0
4494	4495	-1.0	-1.0	-1.0	-1.0
4495	4496	-1.0	-1.0	-1.0	-1.0
4496	4497	-1.0	-1.0	-1.0	-1.0
4497	4498	-1.0	-1.0	-1.0	-1.0
4498	4499	-1.0	-1.0	-1.0	-1.0
4499	4500	-1.0	-1.0	-1.0	-1.0
4500	4501	-1.0	-1.0	-1.0	-1.0
4501	4502	-1.0	-1.0	-1.0	-1.0
4502	4503	-1.0	-1.0	-1.0	-1.0